crowdfunding<-read.csv( "forqrm.csv" ,header=1)
head(crowdfunding)
library(lmtest)
Loading required package: zoo
Attaching package: 'zoo'
The following objects are masked from 'package:base':
as.Date, as.Date.numeric
rownames(crowdfunding)<-crowdfunding$State
summary(lm(successful.rate2pAdDeg$residuals~crowdfunding$PovRate1))
Call:
lm(formula = successful.rate2pAdDeg$residuals ~ crowdfunding$PovRate1)
Residuals:
Min 1Q Median 3Q Max
-0.197685 -0.048501 -0.000564 0.048829 0.163646
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) -0.15641 0.05357 -2.920 0.00532 **
crowdfunding$PovRate1 1.05668 0.35451 2.981 0.00451 **
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Residual standard error: 0.07615 on 48 degrees of freedom
Multiple R-squared: 0.1562, Adjusted R-squared: 0.1386
F-statistic: 8.884 on 1 and 48 DF, p-value: 0.004505
summary(lm(successful.rate2pAdDeg$residuals~crowdfunding$GiniCoeff))
Call:
lm(formula = successful.rate2pAdDeg$residuals ~ crowdfunding$GiniCoeff)
Residuals:
Min 1Q Median 3Q Max
-0.195484 -0.051186 0.004758 0.054168 0.147777
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) -0.6411 0.2860 -2.242 0.0296 *
crowdfunding$GiniCoeff 1.4178 0.6319 2.244 0.0295 *
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Residual standard error: 0.07887 on 48 degrees of freedom
Multiple R-squared: 0.09492, Adjusted R-squared: 0.07606
F-statistic: 5.034 on 1 and 48 DF, p-value: 0.02951
par(mfrow=c(1,2) )
#barplot(crowdfunding$count_of_Grand.Total,names.arg = crowdfunding$State,col="skyblue")
barplot(crowdfunding$count_of_Grand.Total,col="orange",axes = 0,xlab = "State",,ylab="Count of Projects",main="Count of Projects by States")
barplot(crowdfunding$successful.rate,col="skyblue",axes = 0,xlab = "State",main="Successful Rate by States",ylab="Successful Rate")
par(mfrow=c(1,2) )
#count_of_Grand.Total
plot(crowdfunding$count_of_Grand.Total,col=crowdfunding$Region, main="Count of Project Plot",ylab="Successful Rate",xaxt="n",xlab="State")
#axis(side=1,at=c(1,2,3,4,5,6,7,8),labels=c(crowdfunding$State))
legend("center",legend = levels(crowdfunding$Region),cex = 0.8, pch = 1,col=1:3)
#successful.rate
plot(crowdfunding$successful.rate,col=crowdfunding$Region, main="Successful Rate Plot",ylab="Successful Rate",xaxt="n",xlab="State")
#axis(side=1,at=c(1,2,3,4,5,6,7,8),labels=c(crowdfunding$State))
legend("bottomleft",legend = levels(crowdfunding$Region),cex = 0.8, pch = 1,col=1:3)
par(mfrow=c(1,2))
#Boxplot for successful.rate and count_of_Grand.Total
#count_of_Grand.Total
boxplot(log(crowdfunding$count_of_Grand.Total[crowdfunding$Region=="Midwest"]),log(crowdfunding$count_of_Grand.Total[crowdfunding$Region=="Northeast"]),log(crowdfunding$count_of_Grand.Total[crowdfunding$Region=="South"]),log(crowdfunding$count_of_Grand.Total[crowdfunding$Region=="West"]),names=levels(crowdfunding$Region),main="Count of Projects BoxPlot by Region")
#successful.rate
boxplot(crowdfunding$successful.rate[crowdfunding$Region=="Midwest"],crowdfunding$successful.rate[crowdfunding$Region=="Northeast"],crowdfunding$successful.rate[crowdfunding$Region=="South"],crowdfunding$successful.rate[crowdfunding$Region=="West"],names=levels(crowdfunding$Region),main="Successful Rate BoxPlot by Region")
#t.test(crowdfunding$successful.rate[crowdfunding$Region=="West"],crowdfunding$successful.rate[crowdfunding$Region=="Northeast"])
#t.test(crowdfunding$count_of_Grand.Total[crowdfunding$Region=="West"],crowdfunding$count_of_Grand.Total[crowdfunding$Region=="Northeast"])
#calcualte P Value in the t.test of Successful Rate by Region
p=NULL
temp<-NULL
for (location1 in c(levels(crowdfunding$Region))){
for (location2 in c(levels(crowdfunding$Region))){
if (1){
temp<-t.test(crowdfunding$successful.rate[crowdfunding$Region==location1],crowdfunding$successful.rate[crowdfunding$Region==location2])
if(temp$p.value<=0.1){
#print(c(location1,location2,temp$p.value))
}
p<-c(p,temp$p.value)}}}
SR.t.test.p.vlaue<-as.data.frame(matrix(p,4,4),row.names = c(levels(crowdfunding$Region)))
colnames(SR.t.test.p.vlaue)<-c(levels(crowdfunding$Region))
print("Successful Rate by Region")
[1] "Successful Rate by Region"
SR.t.test.p.vlaue
#--------------------------------------------------------
#calcualte P Value in the t.test of Count of projects by Region
p=NULL
temp<-NULL
for (location1 in c(levels(crowdfunding$Region))){
for (location2 in c(levels(crowdfunding$Region))){
if (1){
temp<-t.test(log(crowdfunding$count_of_Grand.Total[crowdfunding$Region==location1]),log(crowdfunding$count_of_Grand.Total[crowdfunding$Region==location2]))
if(temp$p.value<=0.1){
#print(c(location1,location2,temp$p.value))
}
p<-c(p,temp$p.value)}}}
CP.t.test.p.vlaue<-as.data.frame(matrix(p,4,4),row.names = c(levels(crowdfunding$Region)))
colnames(CP.t.test.p.vlaue)<-c(levels(crowdfunding$Region))
print("Count of Projects by Region ")
[1] "Count of Projects by Region "
CP.t.test.p.vlaue
#--------------------------------------------------------
This article is to analyse the factors to the crowdfunding successful rate. I guess the Education, the inequity of family income and the poverty rate may be related to the crowdfunding successful rate. and in the follow context, i will analyse the those factors.
Firstly, The Statistical Summary ### Factors Analysis-Statistical Summary
library(moments)
summary(crowdfunding$successful.rate)
Min. 1st Qu. Median Mean 3rd Qu. Max.
0.1250 0.3179 0.3636 0.3631 0.4095 0.5484
kurtosis(crowdfunding$successful.rate)
[1] 3.630147
summary(crowdfunding$GiniCoeff)
Min. 1st Qu. Median Mean 3rd Qu. Max.
0.4190 0.4400 0.4530 0.4522 0.4658 0.4990
kurtosis(crowdfunding$GiniCoeff)
[1] 2.552647
summary(crowdfunding$pAdDeg)
Min. 1st Qu. Median Mean 3rd Qu. Max.
0.06100 0.07950 0.09200 0.09794 0.11000 0.16400
kurtosis(crowdfunding$pAdDeg)
[1] 3.382781
summary(crowdfunding$PovRate1)
Min. 1st Qu. Median Mean 3rd Qu. Max.
0.0920 0.1212 0.1480 0.1480 0.1705 0.2190
kurtosis(crowdfunding$PovRate1)
[1] 2.159154
boxplot(crowdfunding$successful.rate,crowdfunding$GiniCoeff,crowdfunding$pAdDeg,crowdfunding$PovRate1,names = c("Successful Rate","GiniCoeff","Higher Education","PovRate1"),main="Factors Box Plot")
par(mfrow=c(2,2))
plot(crowdfunding$successful.rate,col="red",pch=10,xlab="State",ylab="Successful Rate",xaxt="n",main="Successful Rate Plot")
plot(crowdfunding$GiniCoeff,col="green",pch=18,xlab="State",xaxt="n",ylab="GiniCoeff ",xaxt="n",main="GiniCoeff Plot")
plot(crowdfunding$pAdDeg,col="blue",pch=15,xlab="State",xaxt="n",ylab="Adanced Education Rate",xaxt="n",main="Adanced Education Rate Plot")
plot(crowdfunding$PovRate1,col="black",pch=16,xlab="State",xaxt="n",ylab="Poverty Rate",xaxt="n",main="Poverty Rate Plot")
hc<-hclust(dist(crowdfunding),method = "ward.D", members = NULL)
NAs introduced by coercion
plclust(hc)
'plclust' is deprecated.
Use 'plot' instead.
See help("Deprecated")
rect.hclust(hc,k=3)
heatmap(as.matrix(dist(crowdfunding,method= 'euclidean')),labRow = F, labCol = F)
NAs introduced by coercion
result<-cutree(hc,k=3)
as.data.frame(result)
pie(result)
barplot(result,col = "blue")
#table(result)
#summary(result)
plot(result,type = "p")
library(ggplot2)
mds2 <- -cmdscale(dist(crowdfunding))
NAs introduced by coercion
plot(mds2, type="n", axes=FALSE, ann=FALSE)
text(mds2, labels=rownames(mds2), xpd = NA)
mds<-cmdscale(dist(crowdfunding),k=3,eig=T)
NAs introduced by coercion
x = mds$points[,1]
y = mds$points[,2]
p=ggplot(data.frame(x,y),aes(x,y))
p+geom_point(size=5 , alpha=0.8 , aes(colour=factor(result) ))
k2<-kmeans(all,centers=5,nstart=10)
summary(k2)
Length Class Mode
cluster 49 -none- numeric
centers 225 -none- numeric
totss 1 -none- numeric
withinss 5 -none- numeric
tot.withinss 1 -none- numeric
betweenss 1 -none- numeric
size 5 -none- numeric
iter 1 -none- numeric
ifault 1 -none- numeric
library(car)
scatterplot(crowdfunding$successful.rate,log(crowdfunding$average_of_goal_Grand.Total),pch=19)
scatterplot(crowdfunding$successful.rate~crowdfunding$GiniCoeff,pch=19)
scatterplot(crowdfunding$successful.rate~crowdfunding$PovRate1,pch=19)
scatterplot(crowdfunding$successful.rate~crowdfunding$Densitym2,pch=19)
scatterplot(crowdfunding$successful.rate~crowdfunding$pHigh,pch=19)
scatterplot(crowdfunding$successful.rate~crowdfunding$pBatDeg,pch=19)
scatterplot(crowdfunding$successful.rate~crowdfunding$pAdDeg,pch=19)
#redo scatterplot with Successful Rate-PovRate1
scatterplot(crowdfunding$successful.rate,crowdfunding$PovRate1,pch=19)
anova(successful.rate2PovRate1)
Analysis of Variance Table
Response: crowdfunding$successful.rate
Df Sum Sq Mean Sq F value Pr(>F)
crowdfunding$PovRate1 1 0.01157 0.0115683 1.4698 0.2313
Residuals 48 0.37780 0.0078708
ggplot(crowdfunding,aes(x=PovRate1,y=successful.rate,main = "Successful rate~PovRate"))+geom_point(aes(col=Region))+geom_smooth(method = "loess")
par(mfrow=c(1,2))
boxplot(crowdfunding$successful.rate,crowdfunding$PovRate1,names=c("Successful Rate","PovRate1"))
boxplot(crowdfunding$successful.rate[crowdfunding$PovRate1>mean(crowdfunding$PovRate1)],crowdfunding$successful.rate[crowdfunding$PovRate1<=mean(crowdfunding$PovRate1)],col = c("green","deepskyblue"),names=c("Successful%(High PovRate)","Successful%(Low PovRate)"),xlab="Successful rate by PovRate1")
t.test(crowdfunding$successful.rate[crowdfunding$PovRate1>mean(crowdfunding$PovRate1)],crowdfunding$successful.rate[crowdfunding$PovRate1<=mean(crowdfunding$PovRate1)])
Welch Two Sample t-test
data: crowdfunding$successful.rate[crowdfunding$PovRate1 > mean(crowdfunding$PovRate1)] and crowdfunding$successful.rate[crowdfunding$PovRate1 <= mean(crowdfunding$PovRate1)]
t = -0.01904, df = 43.704, p-value = 0.9849
alternative hypothesis: true difference in means is not equal to 0
95 percent confidence interval:
-0.05105839 0.05010288
sample estimates:
mean of x mean of y
0.3628157 0.3632935
plot(crowdfunding$successful.rate,crowdfunding$PovRate1,pch=19,col=crowdfunding$Region,xlab="Successful Rate",ylab="PovRate1",main="Successful Rate-PovRate1 Plot with lowess line")
points(lowess(crowdfunding$successful.rate,crowdfunding$PovRate1,f=1/3),pch=4,col="orange",type="l")
#abline(lm(crowdfunding$successful.rate~crowdfunding$PovRate1),col="orange")
legend("topleft",legend = levels(crowdfunding$Region),cex = 0.8, pch = 19,col=1:3)
qqplot(crowdfunding$successful.rate,crowdfunding$PovRate1,pch=1,col=crowdfunding$Region,main="QQ plot: (Successful Rate & PovRate)")
qqline(crowdfunding$successful.rate,crowdfunding$PovRate1,col="red")
the condition has length > 1 and only the first element will be used
legend("topleft",legend = levels(crowdfunding$Region), pch = 19,col=1:3)
par(mfrow=c(1,1))
#qqnorm(crowdfunding$successful.rate,col=crowdfunding$Region,xlab="Successful Rate")
#qqline(crowdfunding$successful.rate,col="red")
qqnorm(crowdfunding$PovRate1,col=crowdfunding$Region,pch=18,xlab ="PovRate1")
qqline(crowdfunding$PovRate1,col="red")
ggplot(crowdfunding,aes(x=GiniCoeff,y=successful.rate,main = "Successful rate~GiniCoeff"))+geom_point(aes(col=Region))+geom_smooth(method = "loess")
anova(successful.rate2GiniCoeff)
Analysis of Variance Table
Response: crowdfunding$successful.rate
Df Sum Sq Mean Sq F value Pr(>F)
crowdfunding$GiniCoeff 1 0.06236 0.062361 9.1537 0.003981 **
Residuals 48 0.32701 0.006813
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
par(mfrow=c(1,2))
boxplot(crowdfunding$successful.rate,crowdfunding$GiniCoeff,names=c("Successful rate","GiniCoeff"))
boxplot(crowdfunding$successful.rate[crowdfunding$GiniCoeff>mean(crowdfunding$GiniCoeff)],crowdfunding$successful.rate[crowdfunding$GiniCoeff<=mean(crowdfunding$GiniCoeff)],col = c("darkorchid2","dodgerblue"),names=c("Successful%(High GiniCoeff)","Successful%(Low GiniCoeff)"),xlab="Successful rate by GiniCoeff")
t.test(crowdfunding$successful.rate[crowdfunding$GiniCoeff>mean(crowdfunding$GiniCoeff)],crowdfunding$successful.rate[crowdfunding$GiniCoeff<=mean(crowdfunding$GiniCoeff)])
Welch Two Sample t-test
data: crowdfunding$successful.rate[crowdfunding$GiniCoeff > mean(crowdfunding$GiniCoeff)] and crowdfunding$successful.rate[crowdfunding$GiniCoeff <= mean(crowdfunding$GiniCoeff)]
t = 1.6383, df = 43.111, p-value = 0.1086
alternative hypothesis: true difference in means is not equal to 0
95 percent confidence interval:
-0.009375926 0.090607105
sample estimates:
mean of x mean of y
0.3833720 0.3427564
plot(crowdfunding$successful.rate,crowdfunding$GiniCoeff,f=1/3 ,pch=19,col="blue",xlab="Successful Rate",ylab="GiniCoeff",main="Successful Rate-GiniCoeff Plot with lowess line")
points(lowess(crowdfunding$successful.rate,crowdfunding$GiniCoeff,f=1/3),pch=4,col="red",type="l")
qqplot(crowdfunding$successful.rate,crowdfunding$GiniCoeff,pch=19,col="red",main="Q-Q Plot: Successful Rate-GiniCoeff")
qqline(crowdfunding$successful.rate,crowdfunding$GiniCoeff)
the condition has length > 1 and only the first element will be used
#qqnorm(crowdfunding$successful.rate,col="orange",xlab="Successful Rate")
#qqline(crowdfunding$successful.rate,col="red")
par(mfrow=c(1,1))
qqnorm(crowdfunding$GiniCoeff,col="blue",pch=20,xlab="GiniCoeff")
qqline(crowdfunding$GiniCoeff,col="red")
ggplot(crowdfunding,aes(x=pAdDeg,y=successful.rate,main = "Successful rate~GiniCoeff"))+geom_point(aes(col=Region))+geom_smooth(method = "loess")
anova(successful.rate2pAdDeg)
Analysis of Variance Table
Response: crowdfunding$successful.rate
Df Sum Sq Mean Sq F value Pr(>F)
crowdfunding$pAdDeg 1 0.05947 0.059469 8.6527 0.005015 **
Residuals 48 0.32990 0.006873
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
par(mfrow=c(1,2))
boxplot(crowdfunding$successful.rate,crowdfunding$pAdDeg,names=c("Successful rate","Adanced Education"))
boxplot(crowdfunding$successful.rate[crowdfunding$pAdDeg>mean(crowdfunding$pAdDeg)],crowdfunding$successful.rate[crowdfunding$pAdDeg<=mean(crowdfunding$pAdDeg)],col = c("darkorchid2","dodgerblue"),names=c("Successful%(High Adanced Education)","Successful%(Low Adanced Education)"),xlab="Successful rate by Adanced Education")
t.test(crowdfunding$successful.rate[crowdfunding$pAdDeg>mean(crowdfunding$pAdDeg)],crowdfunding$successful.rate[crowdfunding$pAdDeg<=mean(crowdfunding$pAdDeg)])
Welch Two Sample t-test
data: crowdfunding$successful.rate[crowdfunding$pAdDeg > mean(crowdfunding$pAdDeg)] and crowdfunding$successful.rate[crowdfunding$pAdDeg <= mean(crowdfunding$pAdDeg)]
t = 3.5483, df = 45.573, p-value = 0.0009119
alternative hypothesis: true difference in means is not equal to 0
95 percent confidence interval:
0.03480093 0.12610121
sample estimates:
mean of x mean of y
0.4097258 0.3292747
plot(crowdfunding$successful.rate,crowdfunding$pAdDeg,f=1/3 ,pch=19,col="blue",xlab="Successful Rate",ylab="Adanced Education",main="Successful Rate-Adanced Education Plot with lowess line")
points(lowess(crowdfunding$successful.rate,crowdfunding$pAdDeg,f=1/3),pch=4,col="red",type="l")
qqplot(crowdfunding$successful.rate,crowdfunding$pAdDeg,pch=19,col="red",main="Q-Q Plot: Successful Rate-Adanced Education")
qqline(crowdfunding$successful.rate,crowdfunding$pAdDeg)
the condition has length > 1 and only the first element will be used
#qqnorm(crowdfunding$successful.rate,col="orange",xlab="Successful Rate")
#qqline(crowdfunding$successful.rate,col="red")
par(mfrow=c(1,1))
qqnorm(crowdfunding$pAdDeg,col="blue",pch=20,xlab="Adanced Education")
qqline(crowdfunding$pAdDeg,col="red")